library(tidyverse)
library(leaflet)Number of categories were reduced from 39 to 15 by combining common crime types under one category. This reduction in detail is to help gain quick insights while plotting the data. It is not expected to significantly affect the analysis.
Categories <- read_csv("Categories.csv")incidents_house_price datasetLoad the incidents_house_price dataset from the previous data cleaning exercise.
incidents_house_price <- read_csv("incidents_house_price.csv")incidents_house_price datasetincidents_new_categories <- left_join(incidents_house_price, Categories, by = "Category") %>%
select(-n) %>%
rename(New_Category = `New Category`) zipc <- incidents_new_categories %>%
select(zipcode, New_Category, latitude, longitude) %>%
count(zipcode, New_Category, latitude, longitude) %>%
arrange(desc(n))filt_zipc <- zipc %>% filter(n > 10) %>%
mutate(leaflet_labels = paste0(New_Category, " (", n, ")")) %>%
mutate(leaflet_radius = findInterval(n, c(50, 100, 200, 300, 400, 500, 600,
700, 800, 900, 1000, 2000, 5000, 10000)))
filt_zipc$New_Category <- as.factor(filt_zipc$New_Category)The number of crimes for each category are shown on a map of San Francisco using the leaflet library. This provides a spatial visualization of areas with high concentration of crimes and crime-types. The tool tip indicates the category followed by the number of crimes in parenthesis ().
col_pal <- colorFactor(palette = "magma", levels =
levels(filt_zipc$New_Category))
leaflet() %>%
setView(lng = -122.4164, lat = 37.7766, zoom = 12) %>%
addTiles() %>%
addCircleMarkers(filt_zipc, lng = filt_zipc$longitude, lat = filt_zipc$latitude,
weight = 5, radius = filt_zipc$leaflet_radius * 1.5, fillOpacity = 0.8,
color = col_pal(filt_zipc$New_Category),
label = filt_zipc$leaflet_labels) %>%
addLegend("topright", col_pal, values = filt_zipc$New_Category)incidents_new_categories %>%
ggplot(aes(x = New_Category)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_y_continuous(labels=function(n){format(n, scientific = FALSE)},
breaks = seq(0,500000,by = 25000)) +
labs(x = "Categories of crime", y = "Number of crimes") +
coord_flip()The pie chart below is to assess the variation in the number of crimes by day of the week.
incidents_by_day <- incidents_new_categories %>%
select(DayOfWeek, New_Category) %>%
count(DayOfWeek) pie <- incidents_by_day %>%
ggplot(aes(x = factor(1), y = n, fill = DayOfWeek)) +
geom_bar(width = 1,stat="identity", color = "black") +
guides(fill=guide_legend(override.aes=list(colour=NA))) +
coord_polar(theta = "y") +
theme(axis.ticks=element_blank(),
axis.title=element_blank(),
axis.text.y=element_blank())
y.breaks <- cumsum(incidents_by_day$n) - incidents_by_day$n/2
pie +
theme(axis.text.x=element_text(color='black')) +
scale_y_continuous(breaks=y.breaks, labels=incidents_by_day$DayOfWeek) +
geom_text(aes(y=y.breaks, label = n), size=3)The heatmap below shows the number of crimes by month and year. It allows for easy visualization of hot-spots, i.e., month-year combinations with high crime-rates.
incidents_by_month <- incidents_new_categories %>%
select(year, month) %>%
count(year, month)incidents_by_month %>%
mutate_at(c("month"), as.factor) %>%
mutate_at(c("year"), as.factor) %>%
filter(n > 7000) %>%
ggplot(aes(x = month, y = year)) +
geom_tile(aes(fill = n)) +
scale_fill_gradient(low = "white", high = "darkred") +
labs(x = "Months", y = "Number of crimes") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0)) +
scale_x_discrete(labels=month.name) The bar plot below shows the number of incidents resolved for each PD district. The visualization allows for easy comparison of the relative resolution of the number of cases for each district.
resolution <- incidents_new_categories %>%
select(New_Category, PdDistrict,Resolution) %>%
mutate(Resolved = ifelse(Resolution == "NONE" , 0, 1))resolution %>%
mutate(Resolved = ifelse(Resolved == 0, "N", "Y")) %>%
mutate_at(c("Resolved"), .funs = as.factor) %>%
ggplot(aes(x = Resolved)) +
geom_bar(aes(color = Resolved, fill = Resolved)) +
facet_grid(~PdDistrict) +
scale_fill_manual(values = c("DarkRed", "DarkGreen")) +
scale_color_manual(values = c("DarkRed", "DarkGreen")) +
theme_bw() +
theme(legend.position="none")